library(readr)
library(tidyverse)
library(scales)
library(plotly)
Full_centrality <- read_csv("Full_centrality.csv",
col_types = cols(X1 = col_skip(),
page_id = col_character(),
page_name = col_character(),
week = col_date(format = "%Y-%m-%d")))
page_name_map = read_csv("DATA/1000-page-info.csv",
col_types = cols(
page_id = col_character(),
page_name = col_character()
)
)[,1:2]
Full_centrality = Full_centrality %>% left_join(page_name_map, by='page_id') %>%relocate(page_name, .after = page_id)
get_top_10 = function(centrality){
Full_centrality %>%
filter(week == min(week)) %>%
arrange(desc(.data[[centrality]])) %>%
select(page_id) %>% head(10) %>% pull
}
plot_top_10 = function(top_10_list, centrality){
top10.centrality.all = Full_centrality %>% filter(page_id %in% top_10_list) %>% select(page_name,.data[[centrality]], week)
(top10.centrality.all %>% ggplot() +
geom_line(aes(y = .data[[centrality]], x = week, color=page_name)) +
scale_x_date(labels = date_format("%Y-%m"))) %>% ggplotly
}
t10 = get_top_10('degree_centrality')
plot_top_10(t10, 'degree_centrality')
簡而言之就是總觸及率
centrality = 'degree_centrality'
top.10.deg = get_top_10(centrality)
plot_top_10(top.10.deg, centrality)
與結點互動的對象中心性越高,自己的中心性就會越高
\[ C_E^{user} = \frac{1}{\lambda} \sum_{p \in page} C_E^{page}(p) a_{ip} \\ C_E^{page} = \frac{1}{\lambda} \sum_{u \in user} C_E^{user}(u) a_{iu} \]
centrality = 'eigenvector_centrality'
top.10.eig = get_top_10(centrality)
plot_top_10(top.10.eig, centrality)
centrality = 'unweighted_eigenvector_centrality'
top.10.unw.eig = get_top_10(centrality)
plot_top_10(top.10.unw.eig, centrality)
Narmalize 後容易被 outlire 影響(10月上下,有一個用戶特別勤奮對其中一個粉專按讚,則他與那個粉專的中心性都會增加)
到其他節點的平均距離(次數)越高,中心性越小。
centrality = 'closeness_centrality'
top.10.cls = get_top_10(centrality)
plot_top_10(top.10.cls, centrality)
互動次數在計算過程中不具意義,較不具代表性的衡量
如果以其中一個粉專/用戶作為消息來源起點(source),另外一個粉專/用戶作為消息終點(sink),則關注的節點對於資訊流量的貢獻有多少?
該節點在每一對節點之間的流量總和,就是Current flow betweenness centrality。
計算上模擬電流運作 https://tinyurl.com/27dcmgj5
#out.width="100%"}
top10.unweighted.eig.centrality.week_other = Full_centrality %>% filter(week == '2016-09-25') %>% arrange(desc(unweighted_eigenvector_centrality)) %>% select(page_name) %>% head(3) %>% pull
top10.unweighted.eig.centrality.all = Full_centrality %>% filter(page_name %in% c(top.10.unw.eig[1:3], top10.unweighted.eig.centrality.week_other)) %>% select(page_name,unweighted_eigenvector_centrality, week)
top10.unweighted.eig.centrality.all %>% ggplot() +
geom_line(aes(y = unweighted_eigenvector_centrality, x = week, color=page_name)) +
scale_x_date(labels = date_format("%Y-%m-%d"))
#ggplotly(p)
#out.width="100%"}
top10.eig.centrality.week_other = Full_centrality %>% filter(week == '2016-10-02') %>% arrange(desc(eigenvector_centrality)) %>% select(page_name) %>% head(4) %>% pull
top10.eig.centrality.all = Full_centrality %>% filter(page_name %in% c(top.10.eig[1:4], top10.eig.centrality.week_other)) %>% select(page_name,unweighted_eigenvector_centrality, week)
top10.eig.centrality.all %>% ggplot() +
geom_line(aes(y = unweighted_eigenvector_centrality, x = week, color=page_name)) +
scale_x_date(labels = date_format("%Y-%m-%d"))
#ggplotly(p)
將用戶粉專互動的社會網路投影到只有粉專的社會網路上,並以此進行community detection(透過 Louvain algorithm),可以大致的偵測出支持川普陣營的粉專,以及希拉蕊陣營的粉專。
以下將各個陣營的粉專的各種中心性取平均,畫出隨時間的變化圖
library(readr)
avg_centrality_panel <- read_csv("avg_centrality_panel.csv",
col_types = cols(week = col_date(format = "%Y-%m-%d")))
avg_centrality_panel$community = factor(
avg_centrality_panel$community,
levels = c('Trump', 'Clinton','others'))
plot_avg_centrality = function(col_name, title_name){
avg_centrality_panel %>% ggplot() +
geom_line(aes(x = week, y = .data[[col_name]], color = community)) +
scale_x_date(labels = date_format("%Y-%m-%d")) +
ggtitle(paste(title_name, "Change")) +
ylab(title_name)
}
plot_avg_centrality('degree_centrality',
'Degree Centrality')
plot_avg_centrality('eigenvector_centrality',
'Eigenvector Centrality')
plot_avg_centrality('unweighted_eigenvector_centrality',
'Unweighted Eigenvector Centrality')
plot_avg_centrality('closeness_centrality',
'Closeness Centrality')
Full_centrality <- read_csv("Result/without_politicians/Full_centrality.csv",
col_types = cols(X1 = col_skip(),
page_id = col_character(),
page_name = col_character(),
week = col_date(format = "%Y-%m-%d")))
## Warning: Missing column names filled in: 'X1' [1]
## Warning: The following named parsers don't match the column names: page_name
page_name_map = read_csv("DATA/1000-page-info.csv",
col_types = cols(
page_id = col_character(),
page_name = col_character()
)
)[,1:2]
Full_centrality = Full_centrality %>% left_join(page_name_map, by='page_id') %>%relocate(page_name, .after = page_id)
centrality = 'degree_centrality'
top.10.deg = get_top_10(centrality)
plot_top_10(top.10.deg, centrality)
centrality = 'eigenvector_centrality'
top.10.eig = get_top_10(centrality)
plot_top_10(top.10.eig, centrality)
centrality = 'unweighted_eigenvector_centrality'
top.10.unw.eig = get_top_10(centrality)
plot_top_10(top.10.unw.eig, centrality)
centrality = 'closeness_centrality'
top.10.cls = get_top_10(centrality)
plot_top_10(top.10.cls, centrality)
將四種不同中心性進行panel data regression \[ \text{centrality}_{it} = \alpha + \text{fake_count_cum}_{it} \times \beta + \text{is_debate}_{it} \times \gamma + u_i \] 其中 $t 紀錄了總統大選辯論會的時間,觀察到辯論會時eigenvector centrality有大幅下降,需要把這個因子納入考量,以研究假新聞對中心性的影響。 而{it} 則紀錄了過去累計發布假新聞數量。我假設為線性增加,但centrality實際上會被Normalized,所以可以考慮取對數。
我根據粉專類別(type) 進行迴歸。
figure 例如 Trump 等政治人物或記者
group 例如 Occupy Democrats 等政府/非政府組織粉專
media 例如 CNN 等媒體。底下又有細分電視、網路、報紙等
others 演員、喜劇、歌手等
| (1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | |
| figure | group | media | others | figure | group | media | others | |
| fake_posts_cum | 0.000522*** | 0.00000263 | 0.0000325 | -0.0000186 | 0.000514*** | 0.00000246 | 0.0000321 | -0.0000186 |
| (4.33) | (0.33) | (1.95) | (-0.56) | (4.27) | (0.31) | (1.93) | (-0.56) | |
| is_debate | 0.000914** | 0.000109 | 0.000148 | -0.000118 | ||||
| (2.89) | (1.02) | (1.71) | (-0.56) | |||||
| _cons | 0.00524*** | 0.00269*** | 0.00388*** | 0.00300*** | 0.00517*** | 0.00268*** | 0.00387*** | 0.00301*** |
| (6.57) | (6.38) | (13.64) | (3.87) | (6.48) | (6.35) | (13.59) | (3.85) | |
| N | 1815 | 2933 | 7804 | 567 | 1815 | 2933 | 7804 | 567 |
|
t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | |
| figure | group | media | others | figure | group | media | others | |
| fake_posts_cum | -0.000371 | -0.0000934 | -0.000325* | 0.0000110 | -0.000307 | -0.0000970 | -0.000319* | 0.0000112 |
| (-0.48) | (-0.72) | (-2.46) | (0.17) | (-0.40) | (-0.75) | (-2.42) | (0.18) | |
| is_debate | -0.00785*** | 0.00293 | -0.00293*** | -0.000619 | ||||
| (-3.77) | (1.46) | (-4.16) | (-1.48) | |||||
| _cons | 0.0119*** | 0.00524*** | 0.00636*** | 0.00168** | 0.0124*** | 0.00504*** | 0.00656*** | 0.00173** |
| (3.85) | (4.51) | (5.75) | (3.11) | (4.01) | (4.29) | (5.93) | (3.17) | |
| N | 1815 | 2933 | 7804 | 567 | 1815 | 2933 | 7804 | 567 |
|
t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | |
| figure | group | media | others | figure | group | media | others | |
| fake_posts_cum | 0.00131* | -0.000126* | -0.000178* | -0.0000498 | 0.00131* | -0.000128** | -0.000180* | -0.0000498 |
| (2.31) | (-2.53) | (-2.31) | (-0.47) | (2.31) | (-2.59) | (-2.35) | (-0.47) | |
| is_debate | -0.000296 | 0.00173* | 0.00112** | 0.000301 | ||||
| (-0.20) | (2.57) | (2.79) | (0.44) | |||||
| _cons | 0.0210*** | 0.00967*** | 0.0124*** | 0.00582*** | 0.0210*** | 0.00955*** | 0.0123*** | 0.00580*** |
| (6.01) | (8.27) | (10.16) | (4.08) | (5.99) | (8.14) | (10.08) | (4.04) | |
| N | 1815 | 2933 | 7804 | 567 | 1815 | 2933 | 7804 | 567 |
|
t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||
| (1) | (2) | (3) | (4) | (5) | (6) | (7) | (8) | |
| figure | group | media | others | figure | group | media | others | |
| fake_posts_cum | 0.00163*** | 0.000318*** | 0.000204 | 0.000520 | 0.00162*** | 0.000317*** | 0.000201 | 0.000519 |
| (4.10) | (4.87) | (1.79) | (0.69) | (4.07) | (4.84) | (1.75) | (0.69) | |
| is_debate | 0.00117 | 0.000550 | 0.00154* | -0.00628 | ||||
| (1.11) | (0.62) | (2.55) | (-1.13) | |||||
| _cons | 0.301*** | 0.297*** | 0.310*** | 0.305*** | 0.301*** | 0.297*** | 0.310*** | 0.305*** |
| (126.17) | (184.50) | (291.33) | (87.45) | (125.99) | (186.44) | (291.18) | (86.77) | |
| N | 1815 | 2933 | 7804 | 567 | 1815 | 2933 | 7804 | 567 |
|
t statistics in parentheses
* p < 0.05, ** p < 0.01, *** p < 0.001 | ||||||||